import HTMLParser
import urllib
import re


urlString = "http://www.netnod.se/ix/members"
urlText = []
pathOut = "ixp_members.txt"


class Parse44 (HTMLParser.HTMLParser):

    check1 = False
    # serve per selezionare solo gli as di Stockolm
    check2 = False
    
    def handle_starttag (self, tag, attrs):
        if tag == "td":
            self.check1 = True
            for name, value in attrs:
                if name == "class" and value == "Stockholm":
                    self.check2 = True
    
    def handle_data (self, data):
        if self.check1 == True and self.check2 == True and re.match("^\d+(, \d+)*$",data):
            # piu' asn possono trovarsi insieme separati da ", "
            asn = data.split(", ")
            for i in asn:
                if i not in urlText:
                    urlText.append(i)
            self.check2 = False
        self.check1 = False


lparser = Parse44()
lparser.feed(urllib.urlopen(urlString).read())
fileOut = open(pathOut, "a")
for item in urlText:
    #print item # debug
    print >> fileOut, "44 %s" % item
fileOut.close()